1 Introduction

The objective of this notebook is to refine the clustering annotation done at level 4. This refinement is the result of a manual curation carried out by specialists to remove poor quality cells, misclassified cells or clusters with very few cells and redefine possible new clusters.

Taking into account the data from scRNAseq, we will proceed to:

  • Remove of 1 cluster: mitochondrial+ T cells.
  • Subcluster Follicular Th CXCL13+CBLB+ into 2 clusters

2 Pre-processing

2.1 Load packages

library(Seurat)
library(SeuratWrappers)
library(Signac)
library(tidyverse)
library(reshape2)
library(ggpubr)
library(harmony)

2.2 Parameters

cell_type = "CD4_T"

# Paths
path_to_obj <- str_c(
  here::here("scATAC-seq/results/R_objects/level_4/"),
  cell_type,
  "/04.",
  cell_type,
  "_integration_peak_calling_level_4.rds",
  sep = ""
)

path_to_obj_RNA <- str_c(
  here::here("scRNA-seq/3-clustering/5-level_5/"),
  cell_type,
    "/",
  cell_type,
  "_subseted_integrated_level_5.rds",
  sep = ""
)

# Functions
source(here::here("scRNA-seq/bin/utils.R"))


# Colors
color_palette <-  c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D", 
                    "#5A5156", "#AA0DFE", "#F8A19F", "#F7E1A0", 
                    "#1C8356", "#FEAF16", "#822E1C", "#C4451C", 
                    "#1CBE4F", "#325A9B", "#F6222E")


path_to_level_5 <- here::here("scATAC-seq/results/R_objects/level_5/CD4_T/")
path_to_save <- str_c(path_to_level_5, "01.CD4_T_integrated_level_5.rds")

2.3 Load data

# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat 
## 93602 features across 19288 samples within 1 assay 
## Active assay: peaks_redefined (93602 features, 93382 variable features)
##  3 dimensional reductions calculated: umap, lsi, harmony
seurat_RNA <- readRDS(path_to_obj_RNA)
seurat_RNA
## An object of class Seurat 
## 37378 features across 56794 samples within 1 assay 
## Active assay: RNA (37378 features, 0 variable features)
##  3 dimensional reductions calculated: pca, umap, harmony

2.4 Visualization of the data

p1 <- DimPlot(seurat,
      group.by = "annotation_level_3",
      cols = color_palette,
      pt.size = 0.2)

p2 <- DimPlot(seurat_RNA,
  group.by = "annotation_level_5",
  pt.size = 0.1,cols = color_palette)

p1

p2

3 Removing mitochondrial+ T cells.

selected_cells_clusters <- colnames(seurat)[!(seurat$annotation_level_3 == "Mitochondrial+ T cells")]
seurat <- subset(seurat, cells = selected_cells_clusters)
seurat$annotation_level_3 <- droplevels(seurat$annotation_level_3)

table(seurat$annotation_level_3)
## 
##                 CD200+TOX+        CD4 Non-Tfh KLRB1+           Central Mem PASK-          Central Mem PASK+ Follicular Th CXCL13+CBLB+       Follicular Th CXCR5+        Follicular Th TOX2+          IL2RA+FOXP3+ Treg             Memory T cells                      Naive          naive Treg IKZF2+                       Th17           Treg IKZF2+HPGD+ 
##                        440                        896                        288                       1989                       1175                       2520                       2055                        826                        322                       4573                        415                        650                        234

3.1 Visualization after removing problematic cells

DimPlot(seurat,
      group.by = "annotation_level_3",
      cols = color_palette,
      pt.size = 0.1)

4 Integration

seurat <- seurat %>%
  RunTFIDF() %>%
  FindTopFeatures(min.cutoff = 10) %>%
  RunSVD()

DepthCor(seurat)

seurat <- RunUMAP(object = seurat, reduction = 'lsi', dims = 2:40)

DimPlot(seurat,
      group.by = "annotation_level_3",
      cols = color_palette,
      pt.size = 0.2)

seurat <- RunHarmony(
  object = seurat,
  dims = 2:40,
  group.by.vars = 'assay',
  reduction = 'lsi',
  assay.use = 'peaks_redefined',
  project.dim = FALSE,
  max.iter.harmony = 20
)

seurat <- RunUMAP(seurat, reduction = "harmony", dims = 2:40)
seurat <- FindNeighbors(seurat, reduction = "harmony", dims = 2:40)

DimPlot(seurat,
      group.by = "annotation_level_3",
      cols = color_palette,
      pt.size = 0.2)

5 Save

saveRDS(seurat, path_to_save)

6 Session Information

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] harmony_1.0          Rcpp_1.0.6           ggpubr_0.4.0         reshape2_1.4.4       forcats_0.5.0        stringr_1.4.0        dplyr_1.0.7          purrr_0.3.4          readr_1.4.0          tidyr_1.1.3          tibble_3.1.2         ggplot2_3.3.5        tidyverse_1.3.0      Signac_1.2.1.9003    SeuratWrappers_0.3.0 SeuratObject_4.0.2   Seurat_4.0.3         BiocStyle_2.16.1    
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.1             reticulate_1.20        tidyselect_1.1.1       htmlwidgets_1.5.3      grid_4.0.3             docopt_0.7.1           BiocParallel_1.22.0    Rtsne_0.15             munsell_0.5.0          codetools_0.2-17       ica_1.0-2              future_1.21.0          miniUI_0.1.1.1         withr_2.4.2            colorspace_2.0-2       knitr_1.30             rstudioapi_0.12        stats4_4.0.3           ROCR_1.0-11            ggsignif_0.6.0         tensor_1.5             listenv_0.8.0          labeling_0.4.2         slam_0.1-48            GenomeInfoDbData_1.2.3 polyclip_1.10-0        farver_2.1.0           rprojroot_2.0.2        parallelly_1.26.1      vctrs_0.3.8            generics_0.1.0         xfun_0.18              lsa_0.73.2             ggseqlogo_0.1          R6_2.5.0               GenomeInfoDb_1.24.0    rsvd_1.0.3             bitops_1.0-7           spatstat.utils_2.2-0   assertthat_0.2.1       promises_1.2.0.1       scales_1.1.1           gtable_0.3.0           globals_0.14.0         goftest_1.2-2          rlang_0.4.11           RcppRoll_0.3.0         splines_4.0.3          rstatix_0.6.0          lazyeval_0.2.2         spatstat.geom_2.2-0   
##  [52] broom_0.7.2            BiocManager_1.30.10    yaml_2.2.1             abind_1.4-5            modelr_0.1.8           backports_1.2.0        httpuv_1.6.1           tools_4.0.3            bookdown_0.21          ellipsis_0.3.2         spatstat.core_2.2-0    RColorBrewer_1.1-2     BiocGenerics_0.34.0    ggridges_0.5.3         plyr_1.8.6             zlibbioc_1.34.0        RCurl_1.98-1.2         rpart_4.1-15           deldir_0.2-10          pbapply_1.4-3          cowplot_1.1.1          S4Vectors_0.26.0       zoo_1.8-9              haven_2.3.1            ggrepel_0.9.1          cluster_2.1.0          here_1.0.1             fs_1.5.0               magrittr_2.0.1         RSpectra_0.16-0        data.table_1.14.0      scattermore_0.7        openxlsx_4.2.3         lmtest_0.9-38          reprex_0.3.0           RANN_2.6.1             SnowballC_0.7.0        fitdistrplus_1.1-5     matrixStats_0.59.0     hms_0.5.3              patchwork_1.1.1        mime_0.11              evaluate_0.14          xtable_1.8-4           rio_0.5.16             sparsesvd_0.2          readxl_1.3.1           IRanges_2.22.1         gridExtra_2.3          compiler_4.0.3         KernSmooth_2.23-17    
## [103] crayon_1.4.1           htmltools_0.5.1.1      mgcv_1.8-33            later_1.2.0            lubridate_1.7.9        DBI_1.1.0              tweenr_1.0.2           dbplyr_1.4.4           MASS_7.3-53            Matrix_1.3-4           car_3.0-10             cli_3.0.0              parallel_4.0.3         igraph_1.2.6           GenomicRanges_1.40.0   pkgconfig_2.0.3        foreign_0.8-80         plotly_4.9.4.1         spatstat.sparse_2.0-0  xml2_1.3.2             XVector_0.28.0         rvest_0.3.6            digest_0.6.27          sctransform_0.3.2      RcppAnnoy_0.0.18       spatstat.data_2.1-0    Biostrings_2.56.0      rmarkdown_2.5          cellranger_1.1.0       leiden_0.3.8           fastmatch_1.1-0        uwot_0.1.10.9000       curl_4.3.2             shiny_1.6.0            Rsamtools_2.4.0        lifecycle_1.0.0        nlme_3.1-150           jsonlite_1.7.2         carData_3.0-4          viridisLite_0.4.0      fansi_0.5.0            pillar_1.6.1           lattice_0.20-41        fastmap_1.1.0          httr_1.4.2             survival_3.2-7         glue_1.4.2             remotes_2.2.0          zip_2.1.1              qlcMatrix_0.9.7        png_0.1-7             
## [154] ggforce_0.3.3          stringi_1.6.2          blob_1.2.1             irlba_2.3.3            future.apply_1.7.0